All data is from the CDC website: https://data.cdc.gov/w/k8wy-p9cg/tdwk-ruhb?cur=voq8guDL7SP
Data is reported as of: 06/30/2021
This notebook uses the downloadable CSV version of the data (input file = 'Covid1.csv')
import pandas as pd
import numpy
df1 = pd.read_csv("Covid1.csv")
#Examine input file
df1.head(10)
| Data as of | Start Date | End Date | State | County Name | Urban Rural Code | FIPS State | FIPS County | FIPS Code | Indicator | ... | COVID-19 Deaths | Non-Hispanic White | Non-Hispanic Black | Non-Hispanic American Indian or Alaska Native | Non-Hispanic Asian | Non-Hispanic Native Hawaiian or Other Pacific Islander | Hispanic | Other | Urban Rural Description | Footnote | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6/23/21 | 1/1/20 | 6/19/21 | AK | Anchorage Municipality | 3 | 2 | 20 | 2020 | Distribution of all-cause deaths (%) | ... | 204 | 0.581 | 0.046 | 0.210 | 0.058 | 0.027 | 0.031 | 0.046 | Medium metro | NaN |
| 1 | 6/23/21 | 1/1/20 | 6/19/21 | AK | Anchorage Municipality | 3 | 2 | 20 | 2020 | Distribution of COVID-19 deaths (%) | ... | 204 | 0.324 | NaN | 0.324 | 0.147 | 0.093 | NaN | NaN | Medium metro | One or more data cells have counts between 1-9... |
| 2 | 6/23/21 | 1/1/20 | 6/19/21 | AK | Anchorage Municipality | 3 | 2 | 20 | 2020 | Distribution of population (%) | ... | 204 | 0.556 | 0.052 | 0.081 | 0.095 | 0.026 | 0.091 | 0.099 | Medium metro | NaN |
| 3 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Baldwin County | 4 | 1 | 3 | 1003 | Distribution of all-cause deaths (%) | ... | 255 | 0.906 | 0.076 | NaN | NaN | NaN | 0.012 | NaN | Small metro | One or more data cells have counts between 1-9... |
| 4 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Baldwin County | 4 | 1 | 3 | 1003 | Distribution of COVID-19 deaths (%) | ... | 255 | 0.863 | 0.110 | NaN | NaN | NaN | NaN | NaN | Small metro | One or more data cells have counts between 1-9... |
| 5 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Baldwin County | 4 | 1 | 3 | 1003 | Distribution of population (%) | ... | 255 | 0.832 | 0.086 | 0.007 | 0.011 | 0.001 | 0.047 | 0.017 | Small metro | NaN |
| 6 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Calhoun County | 4 | 1 | 15 | 1015 | Distribution of all-cause deaths (%) | ... | 302 | 0.814 | 0.171 | NaN | NaN | NaN | 0.009 | NaN | Small metro | One or more data cells have counts between 1-9... |
| 7 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Calhoun County | 4 | 1 | 15 | 1015 | Distribution of COVID-19 deaths (%) | ... | 302 | 0.788 | 0.199 | NaN | NaN | NaN | NaN | NaN | Small metro | One or more data cells have counts between 1-9... |
| 8 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Calhoun County | 4 | 1 | 15 | 1015 | Distribution of population (%) | ... | 302 | 0.716 | 0.210 | 0.004 | 0.009 | 0.001 | 0.041 | 0.019 | Small metro | NaN |
| 9 | 6/23/21 | 1/1/20 | 6/19/21 | AL | Colbert County | 4 | 1 | 33 | 1033 | Distribution of all-cause deaths (%) | ... | 175 | 0.822 | 0.166 | NaN | NaN | NaN | 0.009 | NaN | Small metro | One or more data cells have counts between 1-9... |
10 rows × 21 columns
# Calculate disproportionality indices for different population sub-groups
#Non-HS-White
column1 = df1.iloc[1::3,12]
column2 = df1.iloc[2::3,12]
vals1 = column1.values/column2.values
numpy.nan_to_num(vals1,False,0)
codes = df1.iloc[2::3,8]
states = df1.iloc[2::3,3]
counties = df1.iloc[2::3,4]
#Non-HS-Black
column3 = df1.iloc[1::3,13]
column4 = df1.iloc[2::3,13]
vals2 = column3.values/column4.values
numpy.nan_to_num(vals2,False,0)
#Non-HS-Asian
column5 = df1.iloc[1::3,15]
column6 = df1.iloc[2::3,15]
vals3 = column5.values/column6.values
numpy.nan_to_num(vals3,False,0)
#HS
column7 = df1.iloc[1::3,17]
column8 = df1.iloc[2::3,17]
vals4 = column7.values/column8.values
numpy.nan_to_num(vals4,False,0)
data = {'FIPS': codes, 'State': states, 'County Name':counties, 'NonHSWhite':vals1, 'NonHSBlack':vals2,'NonHSAsian':vals3,'HS':vals4}
df2 = pd.DataFrame(data)
df2.to_csv('output3.csv')
#Examine output file
df2.head(10)
| FIPS | State | County Name | NonHSWhite | NonHSBlack | NonHSAsian | HS | |
|---|---|---|---|---|---|---|---|
| 2 | 2020 | AK | Anchorage Municipality | 0.582734 | 0.000000 | 1.547368 | 0.000000 |
| 5 | 1003 | AL | Baldwin County | 1.037260 | 1.279070 | 0.000000 | 0.000000 |
| 8 | 1015 | AL | Calhoun County | 1.100559 | 0.947619 | 0.000000 | 0.000000 |
| 11 | 1033 | AL | Colbert County | 1.015444 | 1.037500 | 0.000000 | 0.000000 |
| 14 | 1039 | AL | Covington County | 0.993983 | 1.330645 | 0.000000 | 0.000000 |
| 17 | 1043 | AL | Cullman County | 1.061135 | 0.000000 | 0.000000 | 0.000000 |
| 20 | 1047 | AL | Dallas County | 1.125926 | 0.977240 | 0.000000 | 0.000000 |
| 23 | 1049 | AL | DeKalb County | 1.208020 | 0.000000 | 0.000000 | 0.000000 |
| 26 | 1055 | AL | Etowah County | 1.091731 | 0.948387 | 0.000000 | 0.000000 |
| 29 | 1069 | AL | Houston County | 1.051515 | 1.025830 | 0.000000 | 0.617647 |
token = "pk.eyJ1Ijoic3MtMTIzIiwiYSI6ImNraXl4MjU1eTB5ajkyd21vNGlzMW9ucnYifQ.pg8uh9S1bMrPuKlWKsPNlQ" # you will need your own token
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
df = pd.read_csv("output3.csv")
import plotly.graph_objects as go
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df.FIPS, z=df.NonHSWhite,
colorscale="Viridis", zmin=0, zmax=3, marker_line_width=0))
fig.update_layout(mapbox_style="light", mapbox_accesstoken=token,
mapbox_zoom=3, mapbox_center = {"lat": 37.0902, "lon": -95.7129})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df.FIPS, z=df.NonHSBlack,
colorscale="Plotly3", zmin=0, zmax=3, marker_line_width=0))
fig.update_layout(mapbox_style="light", mapbox_accesstoken=token,
mapbox_zoom=3, mapbox_center = {"lat": 37.0902, "lon": -95.7129})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df.FIPS, z=df.NonHSAsian,
colorscale="OrRd", zmin=0, zmax=3, marker_line_width=0))
fig.update_layout(mapbox_style="light", mapbox_accesstoken=token,
mapbox_zoom=3, mapbox_center = {"lat": 37.0902, "lon": -95.7129})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df.FIPS, z=df.HS,
colorscale="Turbo", zmin=0, zmax=3, marker_line_width=0))
fig.update_layout(mapbox_style="light", mapbox_accesstoken=token,
mapbox_zoom=3, mapbox_center = {"lat": 37.0902, "lon": -95.7129})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
import plotly.express as px
fig = px.bar(df,x=df.State,y=[df.NonHSWhite,df.NonHSBlack, df.NonHSAsian,df.HS])
fig.update_xaxes(type='category')
fig.show()
fig = px.scatter(df, x=df.State, y=df.NonHSWhite,color = df.NonHSWhite,
size=df.NonHSWhite)
fig.show()
fig = px.scatter(df, x=df.State, y=df.NonHSBlack,color = df.NonHSBlack,
size=df.NonHSBlack)
fig.show()
fig = px.scatter(df, x=df.State, y=df.NonHSAsian,color = df.NonHSAsian,
size=df.NonHSAsian)
fig.show()
fig = px.scatter(df, x=df.State, y=df.HS,color = df.HS,
size=df.HS)
fig.show()
For more information on healthcare disparities, healthy equity and outcomes, please visit the following links:
https://www.cdc.gov/coronavirus/2019-ncov/community/health-equity/race-ethnicity.html